*combine MA file with baseline variables, school attendance, and outcome data


cd "$data_clean"

global sample	 	1 // identify MA cohorts
global sims_ma 		1 //basic sims process
global baselinevars 1 //baseline demographics
global sims_years	1 // identify attendance
global hsgrad		1 // generate HS grad
global nsc			1 // add college outcomes

program define cohorts
	keep if proj_year12 <= 2018 & proj_year12 >= 2004 // relevant sample
	end


*identify middle school and above
if $sample == 1 {
		local l=1
		foreach y in  02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18  19 {

				use sasid grade using "$raw\saves\end`y'.dta", clear
									gen year=20`y'
					keep if grade>=4&grade<=12
					duplicates drop
					save "$raw\saves\grade`t'`y'_ma.dta", replace
					local ++l			
			}
		
use "$raw\saves\gradeoct02_ma.dta", clear
		foreach y in 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 {
			cap append using "$raw\saves\grade`t'`y'_ma.dta"
			}
			
					format sasid %14.0f	
save "$raw/saves/appended_ma.dta", replace
use "$raw/saves/appended_ma.dta", clear
bys sasid: egen min=min(year)
keep if year==min
duplicates drop
*keep youngest grade
bys sasid: egen ming=min(grade)
keep if grade==ming
g proj_year12=.
foreach n of numlist 4/12{
local n1 = 12-`n'
replace proj_year12=year+`n1' if grade==`n'
}
keep sasid proj_year12 
duplicates drop
save "$raw\saves\proj_year12_ma.dta", replace

*Get  SIMS data for the sasids 
foreach y in 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19{
	use "$raw/saves/proj_year12_ma.dta", clear
	merge 1:m sasid  using "$raw/saves/end`y'.dta", keep(3) nogen

	save "$raw/saves/end`y'_ma.dta", replace
}

}
if $sims_ma == 1{

#delimit;
local files end02  end03  end04  end05  end06  end07  end08  end09  end10  end11  end12 end13 end14  end15 end16 end17  end18 end19;
foreach f of local files {;
	use "$saves/`f'_ma.dta" , clear;
	drop *name rfe ;
	cap drop posthsplan; 
	format sasid %12.0f;
	duplicates tag sasid school, gen(dup);
	/*when there are dups, there are always only one case with all info. (only an issue in 2001) keep that case.*/;
	drop if race==.& dup>=1;
	*a few left, keep max attended WITHIN school;
	bys sasid school: egen maxattend=max(attend);
	gen tag=1 if attend==maxattend;
	*Make sure no SASID is dropped accidentally;
	keep if dup==0|tag==1;
	duplicates drop;
	duplicates drop sasid school, force;
	drop maxattend;
	drop dup tag;
	compress;
	sort sasid school;
	save "$saves/`f'_man.dta", replace;
	};
#delimit;
use "$saves\end02_man.dta", clear;
append using "$saves\end03_man";
append using "$saves\end04_man";
append using "$saves\end05_man";
append using "$saves\end06_man";
append using "$saves\end07_man";
append using "$saves\end08_man";
append using "$saves\end09_man";
append using "$saves\end10_man";
append using "$saves\end11_man";
append using "$saves\end12_man";
append using "$saves\end13_man";
append using "$saves\end14_man";
append using "$saves\end15_man";
append using "$saves\end16_man";
append using "$saves\end17_man";
append using "$saves\end18_man";
append using "$saves\end19_man";


compress;
#delimit;
sort sasid year ;
format sasid %13.0f;
	gen blah = school if year==2002|year==2003;
	tostring blah, replace;
	*keep last 3 digits;
	qui replace blah=substr(blah,-3,.);
	destring blah, replace;
	qui replace blah= org_code*10000+blah;
	qui replace school=blah if year==2002|year==2003;
	rename school masscode;
	drop blah; 
	ren org_code district;
	*drop time;
duplicates drop;

**deal with race variables;
label def race 1 "Other Race" 2 "Asian" 3 "Black" 5 "White" 99 "Hispanic" ;
gen raceFIX=.;
*asian;
replace raceFIX=2 if race63==3;
*black;
replace raceFIX=3 if race63==2;
*white;
replace raceFIX=5 if race63==1;
*native american;
replace raceFIX=4 if race63==4;
*otherrace;
*replace raceFIX=1 if (race63<=31&race63>=6)|(race63<=31&race63>=63);
replace raceFIX=1 if (race63<=63&race63>=5);
*hispanic (write over other categories);
replace raceFIX=99 if race63==33|race63==34|race63==35|race63==36|race63==37;

replace race=raceFIX if year>=2006;
drop race63;
drop raceFIX;

save "$saves/prelabel_ma.dta", replace;

use "$saves/prelabel_ma.dta", clear;

#delimit;
preserve;

use "$raw\School\schoollab13.dta", clear; 
levelsof value, local(values);
cap label drop school;
foreach v in `values' {;
	levelsof label if value==`v', local(lab);
	label define school `v' `lab', add;
};
label values value school;
save "$saves\schoollab13_c.dta", replace;
restore;

rename masscode value;
cap drop _merge;
merge m:1 value using "$saves\schoollab13_c.dta", nogen keep(1 3);
label values value school;
rename value masscode;
drop label;

label define city 1 "Abington"; label define city  2 "Acton", add; label define city 3 "Acushnet", add; label define city 4
 "Adams", add; label define city 5 "Agawam", add; label define city 6 "Alford", add; label define city 7 "Amesbury", add; label
 define city 8 "Amherst", add; label define city 9 "Andover", add; label define city 10 "Arlington", add; label define city 11
 "Ashburnham", add; label define city 12 "Ashby", add; label define city 13 "Ashfield", add; label define city 14 "Ashland", add;
 label define city 15 "Athol", add; label define city 16 "Attleboro", add; label define city 17 "Auburn", add; label define city
 18 "Avon", add; label define city 19 "Ayer", add; label define city 20 "Barnstable", add; label define city 21 "Barre", add;
 label define city 22 "Becket", add; label define city 23 "Bedford", add; label define city 24 "Belchertown", add; label define
 city 25 "Bellingham", add; label define city 26 "Belmont", add; label define city 27 "Berkley", add; label define city 28
 "Berlin", add; label define city 29 "Bernardston", add; label define city 30 "Beverly", add; label define city 31 "Billerica",
 add; label define city 32 "Blackstone", add; label define city 33 "Blandford", add; label define city 34 "Bolton", add; label
 define city 35 "Boston", add; label define city 36 "Bourne", add; label define city 37 "Boxborough", add; label define city 38
 "Boxford", add; label define city 39 "Boylston", add; label define city 40 "Braintree", add; label define city 41 "Brewster",
 add; label define city 42 "Bridgewater", add; label define city 43 "Brimfield", add; label define city 44 "Brockton", add; label
 define city 45 "Brookfield", add; label define city 46 "Brookline", add; label define city 47 "Buckland", add; label define city
 48 "Burlington", add; label define city 49 "Cambridge", add; label define city 50 "Canton", add; label define city 51
 "Carlisle", add; label define city 52 "Carver", add; label define city 53 "Charlemont", add; label define city 54 "Charlton",
 add; label define city 55 "Chatham", add; label define city 56 "Chelmsford", add; label define city 57 "Chelsea", add; label
 define city 58 "Cheshire", add; label define city 59 "Chester", add; label define city 60 "Chesterfield", add; label define city
 61 "Chicopee", add; label define city 62 "Chilmark", add; label define city 63 "Clarksburg", add; label define city 64
 "Clinton", add; label define city 65 "Cohasset", add; label define city 66 "Colrain", add; label define city 67 "Concord", add;
 label define city 68 "Conway", add; label define city 69 "Cummington", add; label define city 70 "Dalton", add; label define
 city 71 "Danvers", add; label define city 72 "Dartmouth", add; label define city 73 "Dedham", add; label define city 74
 "Deerfield", add; label define city 75 "Dennis", add; label define city 76 "Dighton", add; label define city 77 "Douglas", add;
 label define city 78 "Dover", add; label define city 79 "Dracut", add; label define city 80 "Dudley", add; label define city 81
 "Dunstable", add; label define city 82 "Duxbury", add; label define city 83 "East Bridgewater", add; label define city 84 "East
 Brookfield", add; label define city 85 "Eastham", add; label define city 86 "Easthampton", add; label define city 87 "East
 Longmeadow", add; label define city 88 "Easton", add; label define city 89 "Edgartown", add; label define city 90 "Egremont",
 add; label define city 91 "Erving", add; label define city 92 "Essex", add; label define city 93 "Everett", add; label define
 city 94 "Fairhaven", add; label define city 95 "Fall River", add; label define city 96 "Falmouth", add; label define city 97
 "Fitchburg", add; label define city 98 "Florida", add; label define city 99 "Foxborough", add; label define city 100
 "Framingham", add; label define city 101 "Franklin", add; label define city 102 "Freetown", add; label define city 103
 "Gardner", add; label define city 104 "Aquinnah", add; label define city 105 "Georgetown", add; label define city 106 "Gill",
 add; label define city 107 "Gloucester", add; label define city 108 "Goshen", add; label define city 109 "Gosnold", add; label
 define city 110 "Grafton", add; label define city 111 "Granby", add; label define city 112 "Granville", add; label define city
 113 "Great Barrington", add; label define city 114 "Greenfield", add; label define city 115 "Groton", add; label define city 116
 "Groveland", add; label define city 117 "Hadley", add; label define city 118 "Halifax", add; label define city 119 "Hamilton",
 add; label define city 120 "Hampden", add; label define city 121 "Hancock", add; label define city 122 "Hanover", add; label
 define city 123 "Hanson", add; label define city 124 "Hardwick", add; label define city 125 "Harvard", add; label define city
 126 "Harwich", add; label define city 127 "Hatfield", add; label define city 128 "Haverhill", add; label define city 129
 "Hawley", add; label define city 130 "Heath", add; label define city 131 "Hingham", add; label define city 132 "Hinsdale", add;
 label define city 133 "Holbrook", add; label define city 134 "Holden", add; label define city 135 "Holland", add; label define
 city 136 "Holliston", add; label define city 137 "Holyoke", add; label define city 138 "Hopedale", add; label define city 139
 "Hopkinton", add; label define city 140 "Hubbardston", add; label define city 141 "Hudson", add; label define city 142 "Hull",
 add; label define city 143 "Huntington", add; label define city 144 "Ipswich", add; label define city 145 "Kingston", add; label
 define city 146 "Lakeville", add; label define city 147 "Lancaster", add; label define city 148 "Lanesborough", add; label
 define city 149 "Lawrence", add; label define city 150 "Lee", add; label define city 151 "Leicester", add; label define city 152
 "Lenox", add; label define city 153 "Leominster", add; label define city 154 "Leverett", add; label define city 155 "Lexington",
 add; label define city 156 "Leyden", add; label define city 157 "Lincoln", add; label define city 158 "Littleton", add; label
 define city 159 "Longmeadow", add; label define city 160 "Lowell", add; label define city 161 "Ludlow", add; label define city
 162 "Lunenburg", add; label define city 163 "Lynn", add; label define city 164 "Lynnfield", add; label define city 165 "Malden",
 add; label define city 166 "Manchester", add; label define city 167 "Mansfield", add; label define city 168 "Marblehead", add;
 label define city 169 "Marion", add; label define city 170 "Marlborough", add; label define city 171 "Marshfield", add; label
 define city 172 "Mashpee", add; label define city 173 "Mattapoisett", add; label define city 174 "Maynard", add; label define
 city 175 "Medfield", add; label define city 176 "Medford", add; label define city 177 "Medway", add; label define city 178
 "Melrose", add; label define city 179 "Mendon", add; label define city 180 "Merrimac", add; label define city 181 "Methuen",
 add; label define city 182 "Middleborough", add; label define city 183 "Middlefield", add; label define city 184 "Middleton",
 add; label define city 185 "Milford", add; label define city 186 "Millbury", add; label define city 187 "Millis", add; label
 define city 188 "Millville", add; label define city 189 "Milton", add; label define city 190 "Monroe", add; label define city
 191 "Monson", add; label define city 192 "Montague", add; label define city 193 "Monterey", add; label define city 194
 "Montgomery", add; label define city 195 "Mount Washington", add; label define city 196 "Nahant", add; label define city 197
 "Nantucket", add; label define city 198 "Natick", add; label define city 199 "Needham", add; label define city 200 "New
 Ashford", add; label define city 201 "New Bedford", add; label define city 202 "New Braintree", add; label define city 203
 "Newbury", add; label define city 204 "Newburyport", add; label define city 205 "New Marlborough", add; label define city 206
 "New Salem", add; label define city 207 "Newton", add; label define city 208 "Norfolk", add; label define city 209 "North
 Adams", add; label define city 210 "Northampton", add; label define city 211 "North Andover", add; label define city 212 "North
 Attleborough", add; label define city 213 "Northborough", add; label define city 214 "Northbridge", add; label define city 215
 "North Brookfield", add; label define city 216 "Northfield", add; label define city 217 "North Reading", add; label define city
 218 "Norton", add; label define city 219 "Norwell", add; label define city 220 "Norwood", add; label define city 221
 "Oak Bluffs", add; label define city 222 "Oakham", add; label define city 223 "Orange", add; label define city 224 "Orleans",
 add; label define city 225 "Otis", add; label define city 226 "Oxford", add; label define city 227 "Palmer", add; label define
 city 228 "Paxton", add; label define city 229 "Peabody", add; label define city 230 "Pelham", add; label define city 231
 "Pembroke", add; label define city 232 "Pepperell", add; label define city 233 "Peru", add; label define city 234 "Petersham",
 add; label define city 235 "Phillipston", add; label define city 236 "Pittsfield", add; label define city 237 "Plainfield", add;
 label define city 238 "Plainville", add; label define city 239 "Plymouth", add; label define city 240 "Plympton", add; label
 define city 241 "Princeton", add; label define city 242 "Provincetown", add; label define city 243 "Quincy", add; label define
 city 244 "Randolph", add; label define city 245 "Raynham", add; label define city 246 "Reading", add; label define city 247
 "Rehoboth", add; label define city 248 "Revere", add; label define city 249 "Richmond", add; label define city 250 "Rochester",
 add; label define city 251 "Rockland", add; label define city 252 "Rockport", add; label define city 253 "Rowe", add; label
 define city 254 "Rowley", add; label define city 255 "Royalston", add; label define city 256 "Russell", add; label define city
 257 "Rutland", add; label define city 258 "Salem", add; label define city 259 "Salisbury", add; label define city 260
 "Sandisfield", add; label define city 261 "Sandwich", add; label define city 262 "Saugus", add; label define city 263 "Savoy",
 add; label define city 264 "Scituate", add; label define city 265 "Seekonk", add; label define city 266 "Sharon", add; label
 define city 267 "Sheffield", add; label define city 268 "Shelburne", add; label define city 269 "Sherborn", add; label define
 city 270 "Shirley", add; label define city 271 "Shrewsbury", add; label define city 272 "Shutesbury", add; label define city 273
 "Somerset", add; label define city 274 "Somerville", add; label define city 275 "Southampton", add; label define city 276
 "Southborough", add; label define city 277 "Southbridge", add; label define city 278 "South Hadley", add; label define city 279
 "Southwick", add; label define city 280 "Spencer", add; label define city 281 "Springfield", add; label define city 282
 "Sterling", add; label define city 283 "Stockbridge", add; label define city 284 "Stoneham", add; label define city 285
 "Stoughton", add; label define city 286 "Stow", add; label define city 287 "Sturbridge", add; label define city 288 "Sudbury",
 add; label define city 289 "Sunderland", add; label define city 290 "Sutton", add; label define city 291 "Swampscott", add;
 label define city 292 "Swansea", add; label define city 293 "Taunton", add; label define city 294 "Templeton", add; label define
 city 295 "Tewksbury", add; label define city 296 "Tisbury", add; label define city 297 "Tolland", add; label define city 298
 "Topsfield", add; label define city 299 "Townsend", add; label define city 300 "Truro", add; label define city 301
 "Tyngsborough", add; label define city 302 "Tyringham", add; label define city 303 "Upton", add; label define city 304
 "Uxbridge", add; label define city 305 "Wakefield", add; label define city 306 "Wales", add; label define city 307 "Walpole",
 add; label define city 308 "Waltham", add; label define city 309 "Ware", add; label define city 310 "Wareham", add; label define
 city 311 "Warren", add; label define city 312 "Warwick", add; label define city 313 "Washington", add; label define city 314
 "Watertown", add; label define city 315 "Wayland", add; label define city 316 "Webster", add; label define city 317 "Wellesley",
 add; label define city 318 "Wellfleet", add; label define city 319 "Wendell", add; label define city 320 "Wenham", add; label
 define city 321 "Westborough", add; label define city 322 "West Boylston", add; label define city 323 "West Bridgewater", add;
 label define city 324 "West Brookfield", add; label define city 325 "Westfield", add; label define city 326 "Westford", add;
 label define city 327 "Westhampton", add; label define city 328 "Westminster", add; label define city 329 "West Newbury", add;
 label define city 330 "Weston", add; label define city 331 "Westport", add; label define city 332 "West Springfield", add; label
 define city 333 "West Stockbridge", add; label define city 334 "West Tisbury", add; label define city 335 "Westwood", add; label
 define city 336 "Weymouth", add; label define city 337 "Whately", add; label define city 338 "Whitman", add; label define city
 339 "Wilbraham", add; label define city 340 "Williamsburg", add; label define city 341 "Williamstown", add; label define city
 342 "Wilmington", add; label define city 343 "Winchendon", add; label define city 344 "Winchester", add; label define city 345
 "Windsor", add; label define city 346 "Winthrop", add; label define city 347 "Woburn", add; label define city 348 "Worcester",
 add; label define city 349 "Worthington", add; label define city 350 "Wrentham", add; label define city 351 "Yarmouth", add;
 label define city 352 "Devens", add;
 label values town_res city;

replace gender=0 if gender==2;
label def gender 1 "Male" 0 "Female";
g byte female=(gender==0);
label values gender gender;
label def grade -1 "Pre-K" 0 "Kindergarten" 1 "1st" 2 "2nd" 3 "3rd" 4 "4th" 5 "5th" 6 "6th" 7 "7th"
	8 "8th" 9 "9th" 10 "10th" 11 "11th" 12 "12th" 13 "Post-Sec";
label values grade grade;
sort sasid masscode;
ren town_res town_res_perm;
tostring masscode, gen(masscode2);
g  strlen=strlen(masscode2) ;
g blah1=substr(masscode2, 1, 1) if strlen==5;
g blah2=substr(masscode2,1,2) if strlen==6;
g blah3=substr(masscode2,1,3) if strlen==7;
g town_res=blah1 if strlen==5;
replace town_res=blah2 if strlen==6;
replace town_res=blah3 if strlen==7;
drop blah* strlen masscode2;
destring town_res, replace;

*Manually define urban status;
qui egen urban=anymatch(town_res), values(35 44 49 57 61 93 95 97 100 128 137 149 153 160 163 165 201
236 243 248 258 274 281 293 348);
cap drop _merge;
drop town_res;
ren town_res_perm town_res;	
format masscode %12.0f;
save "$saves\sims_ma.dta", replace;
#delimit cr
}

if $baselinevars == 1{
use  "$saves\sims_ma.dta", clear

				*keep 6th grade year for baseline characteristics (or closest to it)
				g keepflag = .
				foreach y in 6 7 8 9 5 4 3 2 1 -1 10 11 12 13 {
				replace keepflag = 1 if grade == `y'
				bys sasid: egen max=max(keepflag) 
				keep if (keepflag==1&max==1)|(keepflag==.&max==.)
				drop max
				}
				
				*keep first time in grade
				bys sasid grade: egen min = min(year)
				keep if year==min
				drop min

	drop if attend==0|attend==.|attend==555
				*Keep most attended for baseline school
				bys sasid year : egen attendmax = max(attend)
				keep if attend==attendmax
				drop attendmax
				duplicates drop 
				*only 20 duplicate remains (attendance ties)
				sample 1, count by(sasid year  attend )

	keep sasid year grade masscode proj_year12 attend dob race low_inc town_res sped female lep

				*****demos*****
				* YEAR OF BIRTH
					qui g yob= year(dob) if dob!=.

				* Include Native Americans in other race category (numbers low in MA pop'n)
				* RACE
					qui g byte hisp= race==99 if race!=. 
					qui g byte black= race==3 if race!=. 
					qui g byte white= race==5 if race!=. 
					qui g byte asian= race==2 if race!=. 
					qui g byte otherrace= race==1 | race==4 if race!=. 

				* FREE REDUCED LUNCH
					qui g byte frpl= low_inc>=1 if low_inc~=.

				* SPECIAL ED
					qui g byte speced= sped>=1 if sped~=.
					drop sped
					ren speced sped

				drop race  low_inc
				
			qui egen bcharter=anymatch(masscode), ///
				values(4810550 4390050 4280305 4440205 4120530 ///
				4490305 4160305 4280305 4100205 4840505 4190305 ///
				4900305 4370505 4380505 4690505 4520505 4220505 ///
				4420050 4750505 4110305 4800405 4620305 4430205 ///
				4570205 4630205 4070405 4110305 4590305 4170205 ///
				4650105)  
			
	
			*boston
			qui gen boston=0 if masscode~=.
			qui replace boston=1 if (masscode>=350000 & masscode<=359999)|bcharter==1
			
	
				g hasbaselinedemos=1 if yob~=.&hisp~=.& black~=.& white~=.& asian~=.& otherrace~=.&female~=.&frpl~=.& sped ~=.&lep~=.&year~=.

				g inbostonbaseline=1 if bcharter==1|(masscode>=350000 & masscode<=359999)
					g inmabaseline=1 
			
	foreach v of varlist year grade masscode yob attend dob hisp black white asian otherrace frpl ///
		town_res bcharter sped female lep  {
			ren `v' baseline`v'
			}
			
		compress
				save "$data_clean/baselinedemos_ma.dta", replace
				}

if $sims_years == 1{
				
*CHARTER YEARS
use "$saves/sims_ma.dta", clear 
keep sasid  year grade  attend masscode proj_year12 district
duplicates drop
sort sasid year grade

g ma = 1

g bps=district==35

drop district
				*bcharter (includes charters in Boston city limits))
					/*
					Excel Academy Charter 			4100205 
					Academy Of the Pacific Rim 		4120530 
					Boston Preparatory Charter 		4160305 
					Smith Leadership Academy Charter4190305 
					Roxbury Charter High  			4220505 
					Edward Brooke Charter 			4280305 
					City On A Hill Charter			4370505 
					City on a Hill Charter II	   35040505 open 2014
					Codman Academy Charter 			4380505 
					Conservatory Lab Charter 		4390050 open 2014
					Frederick Douglass Charter 		4420050 
					Neighborhood House Charter 		4440205 
					Boston Collegiate Charter 		4490305 
					Health Careers Academy HMCS		4520505 
					Dudley Street Neighbor          4070405 (opened in Sep.2012)
					Bridge Boston Dorchester		4170205
					Match Community Day charter     4650105 (opened in Sep.2011)
					MATCH (MATCH MS & HS)       	4690505 
					Boston Renaissance Charter 		4810550 
					Roxbury Preparatory Charter		4840505 
					*(This above becomes the mass code for all UCS: RoxPrep, Dorchester Prep, and Grove Hall in Sp 2012 lottery)
					Uphams Corner Charter (District)4900305 
					Dorchester collegiate academy 	4750505
					UP Academy of Boston 			4800405
					KIPP Academy Boston 			4630205 
					Brooke Charter School 2 Mattapan4430205
					Ed Brooke 3 East Boston  		4570205
					Boston Green Academy (BGA) 		4110305
					Excel 3 Boston Orient Heights	4590305
				
			*Additional charters outside of Boston
				Four Rivers Charter   				   	   4130505
				Berkshire Arts and Technology Charter 	   4140305
				Academy of Strategic Learning HMCS 		   4150505
				Christa McAuliffe Regional Charter 		   4180305
				Benjamin Banneker Charter 			  	   4200205
				Barnstable HMCS 				   		   4230010
				Marstons Mills East HM Charter 			   4270010
				Kipp Academy Lynn Charter 			   	   4290010
				Advanced Math and Science Academy Charter  4300305
				Cape Cod Lighthouse Charter 			   4320530
				Champion Charter  				  	       4340505
				Murdoch Middle Public Charter 			   4350305
				Community Charter School of Cambridge  	   4360305
				Community Day Charter Public Prospect      4400205
				Community Day Charter Gateway              4260205 (opened Sep 2012)
				Community Day Charter Kingman Webster	   4310205 (opened Sep 2012)
				Sabis International Charter 			   4410505
				Abby Kelley Foster Regional 			   4450105
				Foxborough Regional Charter    			   4460550
				Benjamin Franklin Classical Charter 	   4470205
				Hilltown Cooperative Charter Public  	   4500105
				Robert M. Hughes Academy Charter  		   4510550
				Holyoke Community Charter  			   	   4530005
				Lawrence Family Development Charter 	   4540205
				Hill View Montessori Charter Public  	   4550050
				Lowell Community Charter Public 		   4560050
				Lynn Community Charter				   	   4600105
				Marblehead Community Charter 			   4640305
				Martha's Vineyard Charter 			       4660550
				MAGNET: Ma Academy for Math and Science    4680505
				Mystic Valley Regional Charter 			   4700105
				New Leadership HMCS 				       4710405
				New Bedford Global HMCS  			       4720515
				North Central Charter Essential School     4740505
				Silver Hill Horace Mann Charter 		   4770010
				Francis W. Parker Charter Essential 	   4780505
				Pioneer Valley Performing Arts Charter     4790505
				River Valley Charter 				       4820050
				Rising Tide Charter Public 			       4830305
				Salem Academy Charter 				       4850485
				Seven Hills Charter 				       4860105
				Prospect Hill Academy Charter   		   4870550
				South Shore Charter 				       4880550
				Sturgis Charter Public 				       4890505
				Atlantis Charter  				           4910550
				Martin Luther King Jr. Charter School 	   4920005
				Phoenix Charter Academy 			       4930505
				Pioneer Charter School of Science   	   4940205
				Global Learning Charter 			       4960305
				Pioneer Valley Chinese Immersion Charter   4970205
				Innovation Academy 					       4350305 (Murdoch Middle charter previously)
				Hampden charter school of science          4990305
				Boston Green academy                       4110305
				UP academy                                 4800405
				Grove hall preparatory                     4620305 (use Uncommon 4840505 in 2013 - )
				Spirit of knowledge charter school 		   4760505
				Salem Community Charter school             4670505
				Gloucester Community arts                  4480205 (opened in Sep.2010)
				Uncommon Schools                           4840505 (opened in Sep.2012)
				Dorchester collegiate 					   4750505
				Match Community Day charter                4650105 (opened in Sep.2011)
				Excel Chelsea 							   4610405 (opened in Sep.2011) 
  				*/

			g charter = ( masscode >=4000000&masscode<=4999999) | masscode== 35040505
			*spedalt charters
			replace charter = 0 if  masscode==4240505|masscode==4150505|masscode==4580505
			
			*excludes spedalt charters
			qui egen bcharter=anymatch(masscode), ///
				values(4810550 4390050 4280305 4440205 4120530 ///
				4490305 4160305 4280305 4100205 4840505 4190305 ///
				4900305 4370505 4380505 4690505 4520505 4220505 ///
				4420050 4750505 4110305 4800405 4620305 4430205 ///
				4570205 4630205 4070405 4110305 4590305 4170205 ///
				4650105 35040505) 
				
					g charter_notbos=charter-bcharter
		
*******lotteried charters*******
			*exclude charters we don't have lottery information for or who are disqualified
			*DEFINE STUDY SAMPLE HERE -- right now all lotteried charters
			*APR 4120530
			*BosCol 4490305
			*Boston Green Academy 4110305
			*Boston Prep 4160305
			*CoaH  4370505
			*CoaH II 35040505
			*Codman 4380505
			*EdBrooke (not 2 and 3) 4280305
			*Excel (not 3) 4100205
			*Match HS 4690505
			*Match MS 4690505
			*RoxPrep (not Uncommon or Grove Hall or DP) 4840505
			g blottocharter =  inlist(masscode, 4840505, 4690505,4100205, ///
				4280305, 4380505, 4370505, 4160305, 4110305 , 4490305 ,35040505,  4120530)
			
			*LTO sample
			g bltocharter =  inlist(masscode, 4690505, ///
				4380505, 4370505, 4160305 , 4490305 , 4120530)
			
			g urbancharter=blottocharter
			*global and kipp lynn
						*	Salem Academy Charter 				       4850485
			replace urbancharter=1 if inlist(4960305,4290010, 4850485)
			
			*	Four Rivers Charter   				   	   4130505
			*	Cape Cod Lighthouse Charter 			   4320530
			*	Marblehead Community Charter 			   4640305
			*	Francis W. Parker Charter Essential 	   4780505
			*	Pioneer Valley Performing Arts Charter     4790505
			*	Rising Tide Charter Public 			       4830305
			*	Sturgis Charter Public 				       4890505
			* Innovation Academy 					       4350305 (Murdoch Middle charter previously)

			g nonurbancharter=inlist(masscode, 4350305 , 4890505 , 4850485 , ///
				4830305, 4780505 , 4640305 , 4320530 , 4130505, 4790505)
				
******* end lotteried charters*******
				
			*exam
			*MA Academy of Math and Science 4680505 is a public magnet school
			qui egen  exam=anymatch(masscode), values(350560 350545 350575 4680505)
			 
			*spedalt;
			*Let's move the following charters to alternative schools:
			*Boston Day and Evening 4240505
			*Egelston  
			*Amesbury Charter       4150505 
			*Lowell Middlesex 		4580505
			* two virtual schools    39010900 39020900
			qui egen  spedalt=anymatch(masscode), ///
				values(4150505 4580505 350543 1670815 9500203 1510805 ///
				260805 9500245 2070802 9500442 460850 460875 1130803 ///
				300805 9500430 2440810 9500403 1980805 9500404 3320805 ///
				1130812 260870 1220805 400807 1570805 490811 500820 ///
				2100810 2070801 1550806 350725 9500417 1440815 ///
				2880810 3430820 1550810 2880815 9500454 1020805 ///
				2810850 2100835 100815 1680805 9500418 2570805 ///
				1470810 1130810 500815 9500458 9500450 9500443 1850805 ///
				1370840 1820805 950820 490815 9500407 3480820 ///
				100820 3300840 1700820 400820 9500456 9500412 2100823 ///
				9500463 1800810 70830 1520810 1520805 350738 9500428 ///
				1210850 350749 460811 500825 3480960 2810845 350758 /// 
				2010820 9500402 2050810 3190805 9500415 300920 ///
				9500510 410810  3070840 1000845 2070867 9500270 560845 ///
				1530840 3070830 350768 400840 3320830 440860 ///
				550805 400812 1510810 1860820 2920813 3470830 ///
				3480865 830850 2810816 1370825 310805 3260830 ///
				2100818 2760820 9500462 3320815 1550815 960810 ///
				3140890  9500427 90850 3080815 1470830 2390815 1000860 ///
				9500455 1370835 1600920 2610805 3080850 2010845 350809 ///
				2700850 1150805 2540810 270880 200850 1820830 1810812 ///
				950940 9500460 210810 2920810  2920820 2390805 350791 ///
				2440840 9500268 560835 20830 860817 610845 1520840 670802 ///
				1990820 1700803 9500429 3250840 1020825 1590815 2880805 ///
				9500461 9500420 4240505 350543 350580 350588 350036 ///
				350363 350413 350548 350750 350518 351001 351001 1601003 ///
				7731001 1811001 3251004 3251004 1861001 6401002 6151001 ///
				3471001 2581001 7751001 1821001 6801001 1261002 201001 ///
				1011001 6001001 441001 301001 201003 2431004 351002 351002 ///
				4241001 4811001 401002 401001 441004 571002 491001 ///
				1371001 2071002 2071002 6951001 351016 571001 611001 ///
				351003 571003 1141001 351009 3481007 2091001 351018 ///
				351018 6401001 1361001 2091002 951004 951004 1761001 711001 ///
				6451001 351004 951003 351005 6051002 351006 1001002 2431003 ///
				6251001 6251001 961001 3481002 971001 8151002 7551003 ///
				1031002 441002 2441002 2441002 3481004 3481004 2431005 ///
				8151001 521001 351010 351010 1261001 1281001 2361001 ///
				1491002 4531001 1811002 7401001 3251003 7551001 2431002 ///
				441005 1511002 351007 1601002 1601002 4581001 ///
				6001002 1651001 1671002 1671001 1701001 1721001 1601001 ///
				1601001 351011 7551004 3361001 7551004 6831001 1971001 ///
				1981001 3481006 2431006 2431001 351017 351017 351008 ///
				6741001 2651001 651001 2141002 2141002 2011001 2011001 ///
				2291001 4931001 2391001 6351001 6601001 3311001 2261001 ///
				201002 441003 1031001 3481001 231001 2441001 1511001 ///
				951001 1001001 951002 7651001 6751001 3481005 1491001 ///
				2641001 2481001 6581001 571005 6051001 2781001 2071001 ///
				2811001 351014 721001 351013 3251001 1051001 1601004 ///
				2141001 3041001 7001001 501001 2121001 2121001 6221001 ///
				1851001 6701001 3101001 3101001 3321001 3361002 3361002 ///
				3361002 3361003 7801001 351015 3481003 3481003 101001 571004 ///
				351012 50010101 50030101 50640101 50060101 50040101 50930101 ///
				50490101 50050101 50050102 50070102 50070101 50130102 50080101 ///
				50080102 50100101 50100102 50420101 50420102 50640201 50130101 ///
				50130103 50140101 50200101 50200201 50200202 50200203 50210101 ///
				50250102 50250101 50240101 50240102 50410101 50410201 ///
				50110101 50980101 50260101 50260102 50270101 50450101 ///
				50020101 50850101 50850102 50290101 50290102 50290103 ///
				50300101 50300102 50310101 50330101 50330102 50820101 ///
				50340101 50350101 50360101 50360102 50090101 50380101 ///
				50390101 50390102 50490301 50610201 50610101 50710101 ///
				50400101 50400102 50460101 50460102 50590101 50480101 ///
				50190101 50370101 50500101 50510101 50530101 50530102 ///
				50540101 50550101 50550102 50580101 50570101 50230101 ///
				50440201 50440202 50470101 50470102 50620101 50620102 ///
				50620401 50620301 50620302 50630101 50980201 50490601 ///
				50650101 50650102 50160101 50120101 50190201 50680101 ///
				50700101 50670101 50280101 50690101 50690103 50690102 ///
				50690104 50690105 50170101 50490401 50750101 50760101 ///
				50760102 50760105 50760106 50760107 50760108 50760103 ///
				50760104 50770101 50780101 50780102 50820201 50790101 ///
				50800101 50730101 50810101 50890101 50180101 50850201 ///
				50860101 50410301 50220101 50220102 50220103 50830101 ///
				50830102 50840101 50870101 50880101 51000100 50490201 ///
				50440101 50440102 50440102 50440301 50440302 50560101 ///
				50560102 50560104 50560103 50490501 50720101 50720102 ///
				50910101 50910102 50560201 50930202 50930201 50940101 ///
				50940102 50600101 50950101 50960101 50970101 1670815 ///
				1510805 260805 2070802 460850 460875 1130803 300805 2440810 ///
				400807 1980805 3320805 1210850 260870 1220805 1570805 ///
				2100810 500820 2070801 1550806 350725 1440815 2880810 ///
				3430820 1550810 2880815 1020805 2810850 2100835 1410850 ///
				100815 1680805 2570805 1130810 1850805 1370840 1820805 ///
				950820 490815 3480820 100820 3300840 1700820 400840 2100823 ///
				70830 1520810 350749 460811 500825 3480960 2810845 350758 ///
				2010820 2050810 300920 410810 3070840 2070867 560845 ///
				1530840 3070830 350768 3320830 440860 1510810 1860820 ///
				2920813 3470830 3480865 830850 2810816 1370825 310805 ///
				3260830 2100818 2760820 3320815 1550815 960810 3140890 ///
				90850 3080815 1980830 1470830 2390815 1000860 1600920 ///
				2610805 3080850 2010845 350809 2700850 1150805 2540810 ///
				270880 200850 1820830 1810812 950940 210810 2920810 ///
				3480802 2920820 2390805 1520805 350791 1000845 2440840 ///
				20830 860817 610845 670802 1990820 1700803 3250840 ///
				1020825 1590815 2880805 300865 300870 350759  ///
				690805 3280820 1700805 790850 1660810 710860 1370845 ///
				750805 400850 1860825  39010900 39020900)
				qui replace spedalt=1 if masscode >= 50000000 & masscode <= 59999999

			*boston
			qui gen boston=0 if masscode~=.
			qui replace boston=1 if (masscode>=350000 & masscode<=359999)|bcharter==1
			
		*deal with duplicates/repeaters etc
		drop if attend==.|attend==0|attend==555
 			drop if grade>=13|grade<4

		*number of schools that student has attended in year and grade
		egen sasidXyearXgrade = group(sasid year grade)
		quietly unique masscode, by(sasidXyearXgrade) generate(unique)
		qui egen numschools=sum(unique), by(sasidXyearXgrade)
		drop unique sasidXyearXgrade
		
		*assign school within a year
*Assign students to most attended school
		duplicates report sasid year grade
		

		*KEEP OBS BASED ON SCHOOL ASSIGNMENT;
		*for students with no special schools, keep most attended
		*For students who attend charter, keep charter
		bys sasid year grade:  egen special=max(charter)

		bys sasid year grade: egen maxattend=max(attend)
		qui gen keepflag=1 if maxattend==attend
		qui replace keepflag=1 if special==1
		*keep most attended and keep all records of those who have attended special schools
		keep if keepflag==1
		drop max keepflag

		*keep charter school if other school is regular
		drop if charter==0 &(special==1)
		
		*now keep most attended special within a year
		bys sasid year :  egen  maxattend=max(attend)
		keep if attend==maxattend
		*this also drops people with two grades in one year and assigns to most attended

		*now only attendance ties left!
		*D.SUN 12/9/2013: seed already set on top of this do file.
		sample 1, count by(sasid year)

		drop max* special    
		order sasid year grade masscode attend charter masscode 
		sort sasid year
		qui compress
		
		
		*flag repeaters
		g blah =1
		bys sasid grade: egen repeats=sum(blah)
		replace repeats=repeats-1
		label var repeats "number of additional years in grade"
		
		
		foreach c in ma charter blottocharter bltocharter bcharter charter_notbos urbancharter nonurbancharter{
		bys sasid grade `c': egen `c'_repeats=sum(blah) if `c'==1
		replace `c'_repeats=`c'_repeats-1
				replace `c'_repeats=0 if `c'_repeats==.
		label var `c'_repeats "number of additional years in grade in `c'"
		}
				drop blah

		* NOW keep first time in grade 
		bys sasid grade: egen minyear=min(year)
		keep if year==minyear
		drop minyear
		
		
		save "$data_clean/sims_ma_long.dta", replace
	
		use "$data_clean/sims_ma_long.dta", clear
		
		reshape wide year masscode attend ma bcharter  bltocharter blottocharter charter charter_notbos urbancharter nonurbancharter exam spedalt boston numschools *repeats bps ///
			, i(sasid ) j(grade) 

		save "$data_clean/sims_ma_wide.dta", replace

}
if $hsgrad == 1{
use "$saves/full_hs_file.dta", clear
	merge m:1 sasid using "$raw\saves\proj_year12_ma.dta" , keep(2 3) nogen
format sasid %12.0f

keep sasid proj_year12 enstat grade t period year 
cohorts
sort sasid t grade
duplicates drop
  
*drop duplicated status in the same year-file for observations not at end
*the following 5 lines of code potentially affect how transferred etc. is coded
duplicates tag sasid t, gen(tag)
tab tag
bys sasid: egen maxt = max(t)
bys sasid: drop if (tag > 0 & enstat ~= "01" & enstat ~= "04" & t < maxt)
drop maxt  
  
*Code status for exit
gen status=""
*Graduates
gen tempstat=enstat=="04"
bys sasid t: egen maxtemp=max(tempstat)
replace status="graduated" if maxtemp==1 & status==""
drop tempstat maxtemp
*Deceased
gen tempstat=enstat=="06"
bys sasid t: egen maxtemp=max(tempstat)
replace status="deceased" if maxtemp==1 & status==""
drop tempstat maxtemp
*Enrolled
gen tempstat=enstat=="01"
bys sasid t: egen maxtemp=max(tempstat)
replace status="enrolled" if maxtemp==1 & status==""
drop tempstat maxtemp
*Transferred
gen tempstat=enstat=="02" | enstat=="07" | enstat=="08" | enstat=="20" | enstat=="21"| enstat=="22" | enstat=="23" | enstat=="24" | enstat=="41"
bys sasid t: egen maxtemp=max(tempstat)
replace status="transferred" if maxtemp==1 & status==""
drop tempstat maxtemp	

gen tempstat=enstat=="11" | enstat=="10"
bys sasid t: egen maxtemp=max(tempstat)
replace status="completed without grad" if maxtemp==1 & status==""
drop tempstat maxtemp				

gen tempstat=enstat=="09"
bys sasid t: egen maxtemp=max(tempstat)
replace status="max age" if maxtemp==1 & status==""
drop tempstat maxtemp		
*Dropouts
gen tempstat=enstat=="03" | enstat=="30" | enstat=="31" | enstat=="32" | enstat=="33"| enstat=="34" | enstat=="35" | enstat=="36"
bys sasid t: egen maxtemp=max(tempstat)
replace status="dropout" if maxtemp==1 & status==""
drop tempstat maxtemp	

gen tempstat=enstat=="05"
bys sasid t: egen maxtemp=max(tempstat)
replace status="permanent exclusion" if maxtemp==1 & status==""
drop tempstat maxtemp		

gen tempstat=enstat=="40"
bys sasid t: egen maxtemp=max(tempstat)
replace status="not enrolled special service" if maxtemp==1 & status==""
drop tempstat maxtemp
	
*test if transfer grade and enroll grades are the same for the last records of each student
bys sasid t: gen x = (enstat == "01") if tag > 0
bys sasid t: egen xx = max(x) if tag > 0	
bys sasid t: egen mng = mean(grade) if tag > 0 & xx == 1
drop x xx mng tag
   
	 
	 ***************************************************************
	 ** the following set of code generates the variables we want **
	 ***************************************************************
	
	
duplicates drop	
keep sasid proj_year12 grade t period year status  	

*gen graduation status vars
gen gradind = (status == "graduated" & ((proj_year12 >= year & period == "eoy")))
bys sasid: egen hsgrad_4yr = max(gradind)

gen gradind2 = (status == "graduated" & ((proj_year12 >= year)|(proj_year12==year & period =="oct")|(proj_year12 +1 >= year & period=="eoy")))
*replace gradind2 =. if proj_year12 == 2018
bys sasid: egen hsgrad_5yr = max(gradind2)

gen gradind3 = (status == "graduated" & ((proj_year12 >= year)|(proj_year12+1==year & period =="oct")|(proj_year12 +2 >= year & period=="eoy")))
*replace gradind3 =. if proj_year12 == 2017
bys sasid: egen hsgrad_6yr = max(gradind3)
drop gradind gradind2 gradind3


*SRC edit
*actual graduation date
g gradyear=year if status=="graduated"
	// Define HS graduation date as June 30th of that year
	tostring gradyear, replace
	g hsgraddate = date("0630"+gradyear,"MDY")
	destring gradyear, replace

keep sasid gradyear  hsgraddate proj_year12 hsgrad* 
duplicates drop

*deal with dups
*these are expected due to multiple hits in the file 

duplicates tag sasid, gen(s)
tab s

keep if s==0|hsgraddate!=.
drop s
* a few dups remain -- take earlier grad date
duplicates tag sasid, gen(s)
tab s
bys sasid: egen min=min(gradyear) if gradyear!=.
keep if gradyear==.|gradyear==min
drop s min
*egen transf_deceased_ontime = rowmax(deceased_ontime transferred_ontime)
*egen transf_deceased_withintwo = rowmax(deceased_withintwo transferred_withintwo)


save "$data_clean/hsgrad_ma.dta", replace


}

if $nsc == 1 {

***************
// TIME HORIZON FOR OUTCOMES
***************
local weeksPerYear = 52
global n_years_out = 8

forvalues y = 1/$n_years_out {
	
	// the annual outcomes begin in July and end in the following June
	global week_start_inY`y'	"yw(cohort + `=`y'-1', (`weeksPerYear'/2) + 1)"
	global week_end_inY`y'		"yw(cohort + `y', 	(`weeksPerYear'/2))"
	global week_end_inY`y'fall	"yw(cohort + `y'-1, 	`weeksPerYear')"
	global week_start_inY`y'spring	"yw(cohort + `y',  1)"
	global week_end_inY`y'spring	"yw(cohort + `y', (`weeksPerYear'/2))"
	
	// cumulative outcomes include all windows from start of Y1 through the relevant date
	global week_start_byY`y'	${week_start_inY1}
	global week_end_byY`y'		${week_end_inY`y'}
	global week_end_byY`y'fall	${week_end_inY`y'fall}
	global week_end_byY`y'spring	${week_end_inY`y'spring}
	global week_start_byY`y'spring	${week_start_inY1}

}
#delimit ;

// college categories
global collegeCategories  
    any
    public private
    4yr 2yr
    MA MA_public 
	out OOS_public
	4yrpub 4yrMApub 4yrOOSpub 4yrpriv 4yrMApriv 4yrOOSpriv
	2yrpub 2yrMApub 2yrpriv
;
#delimit cr



// load data
use "$data_clean/NSC_enrollment.dta", clear
merge m:1 sasid using "$raw\saves\proj_year12_ma.dta", keep(match ) nogen
replace college = trim(college)
ren proj_year12 cohort
*a few won't match because the subcampus doesn't match to college data, fix that here
g sub = substr(ID_FSC,1,6) +"-00"
foreach f in 001072-05 001536-01 001540-01 001564-01 002029-01 002029-03 002155-08 002155-10 002589-03 ///
	002629-02  003388-01  009828-12 010198-17 010394-11 010727-11 030106-18 002077-02 030425-01 ///
	001047-04 001047-18 001047-54 001305-03 001445-01 001499-01 001499-06 001571-13 001580-07 001580-16 002077-01 002155-02 002155-06 002155-09 ///
	002155-11 002155-12 002219-01 002219-02 002325-03  002484-44 002573-03 002589-01 002607-01 002626-00 002629-23  002666-01 002677-00 002678-01 ///
	002678-02 002678-03 002678-05 002678-10 002782-02 002783-00 002785-20 002803-01 002823-02 002869-01 002878-01 003184-01 003404-09 003404-10 003410-01 ///
	003549-01  003632-04 003634-10 003726-02 003938-02 004075-01 004075-03 004586-16 004619-01 004625-00 004729-05 004949-00 005306-00 006622-00 006799-00 006911-02 007329-07  008423-01 008694-01 008694-08 008694-11 008694-22 009157-05 009183-00 009267-01 009621-04 009621-06 009740-00 009747-01 009747-02 009828-02 010198-04 010198-05 010198-09 010198-13 010434-00 010727-05 010727-12 010727-18 010727-19 010727-20 011109-02 011123-01 011621-01 011941-01 012393-00 012393-01 012393-02 012393-09 012842-00 012873-02 020690-00 020739-00 020744-00 020748-00 020754-00 020774-00 020839-00 021000-00 021077-00 021108-00 021136-00 021136-13 021160-00 021163-00 021206-00 021466-00 021618-00 021664-00 021664-01 021664-04 021664-18 021664-38 021664-41 021707-00 021799-02 021799-47 022187-00 022187-09 022187-11 022195-00 022260-00 022375-00 022460-00 022606-00 022606-01 022606-02 022606-04 022613-03 023058-01 023058-05 023058-07 023058-08 023058-11 023413-00 024544-00 025039-00 025086-00 026167-06 026175-08 030106-20 030106-21 030219-02 030314-03 030704-02 030723-03 030734-01 030764-02 030875-05 030876-04 030908-00 031913-00 036914-00 038813-00 042443-00 042534-00 003010-04	001479-02	022779-00	003256-01	002894-01	004072-15	001417-06	004729-04	001982-00	002669-00	001417-09	003024-01	030106-14	020653-00	002894-01	001305-01	004729-04	025590-00	001402-01	003379-06	002155-05	003010-05	002699-01	021415-01	002894-02	006911-01	006679-00	020758-00	002520-01	002657-01	010198-14	025306-00	033743-00	002784-00	030106-12	021715-00	002254-00	004729-06	001499-02	002477-00	002632-01	001397-28	011220-00	040963-00	002886-00	002155-04	009748-01	001746-17	002825-01	002791-02	002784-00	022606-03	001536-02	010198-06	002651-01	010727-06	001479-02	002878-03	007649-00	021073-00	022425-00	001531-01	002678-07	003691-10	020705-00	003947-00	021366-00	030837-02	010198-02	004951-00	021415-02	031713-03	025349-00	001534-08	001499-04	021191-00	001397-26	022444-00	002903-03	021664-28	001417-07	001473-00	010395-01	002077-03	004952-00	010098-00	021078-00	001855-00	001305-02	003506-00	001433-02	004882-00	010363-00	022827-00	002823-03	042087-00	001479-02	003530-06	001417-08	002894-01	026175-07	004729-06	011732-00	021553-00	003938-11	021799-07	022285-00	004587-01	001767-00	003674-03	021664-44	005273-00	010727-14	001499-08	021006-07	024824-00	003938-05	003535-02	010727-22	023001-03	001317-03	013103-00	002974-01	004507-00	042797-00	001499-02	021664-08	020653-00	007327-01	001499-02	003010-02	010727-04	010727-02	021799-43	022828-00	025256-00	011109-01	021553-00	003938-08	002126-02	026062-02	021584-00	008329-05	011649-01	020757-00	003938-09	010198-11	030837-02	008694-20	020705-00	006725-00	021805-00	021446-00	002116-00	035453-00	002629-01	025340-00	010727-25	030876-02	001329-01	022187-12	030375-01	025998-05	005541-00	001848-00	004553-01	004765-11	025306-00	030876-05	002833-00	009079-06	007234-01	002894-01	042542-00	003897-06	001090-02	010674-00	021799-29	004898-04	006771-01	022220-00	030876-03	003897-04	022187-07	001698-00	020522-00	004646-04	021553-06	022180-00	003938-07	025412-00	002978-05	003379-07	002393-01	001953-00	031713-00	021799-41	005022-00	005022-00	023058-06	023286-00	003313-01	002978-06	102579-12	009054-00	020735-00	003051-43	010198-07	002109-01	001464-00	001047-05	023219-00	025998-01	002784-00	009748-04	002457-00	023598-04	002329-01	030106-02	021664-43	012393-03	009085-00	011940-04	003077-02	002903-01	021799-28	021209-00	021584-14	021553-07	020681-00	012300-02	030106-11	042421-00	022913-00	021664-07	020705-00	021664-02	031121-01	004898-06	009256-00	009748-02	001499-10	003223-02	022472-00	010198-11	005541-05	030876-01	010198-02	010627-02	010851-01	009621-11	030106-04	030106-22	021799-21	021891-01	021891-00	021830-00	021006-11	031121-09	021553-00	025340-00	025054-00	005537-01	024821-00	007912-00	021799-03	010198-14	005252-01	001580-01	038533-00	002963-00	023482-00	001682-00	011858-02	025693-03	002978-03	008694-12	030106-24	030723-04	003938-06	002823-01	021160-06	021799-18	042118-00	021799-05	010997-01	021891-00	031713-05	022809-00	025943-01	023482-00	023058-09	003125-02	008175-03	003897-12	004075-02	042485-00	008878-05	001100-03	021799-01	031713-05	041144-00	005753-01	010198-06	041426-01	020653-00	001772-10	041434-00	010198-15	010727-28	001568-00	008443-03	010727-48	002477-00	024600-00	005498-00	021553-00	004951-00	020682-00	004811-02	021584-07	003125-01	008694-18	004586-17	002903-03	001499-08	030764-04	023485-00	031121-07	021651-00	031713-04	021651-01	005263-03	004646-01	003549-15	021603-05	001164-53	022631-00	031713-00	021596-00	008878-06	020637-00	003379-06	009621-08	023286-02	004586-17	003562-00	020653-00	023058-12	025554-00	002657-03	021553-07	002678-04	022916-00	007085-01	002678-16	002678-06	030888-00	004999-00	021891-01	010674-00	010394-01	008303-02	026167-03	010394-03	022865-02	003044-00	041314-00	010394-04	009828-11	010198-12	004586-15	002484-43	042510-00	002393-01	003938-11	009924-01	004503-02	002474-00	021664-22	003125-04	004951-00	021400-00	023610-02	001948-02	006689-00	003897-13	025693-02	023610-01	002678-11	001813-01	009828-04	021006-00	021830-00	042636-00	031733-01	021661-00	022631-04	020907-00	042186-00	010198-18	003691-10	023001-04	036983-04	005541-00	004553-04	021664-45	021799-39	042568-00	008423-02	009088-03	021799-09	002474-01	002155-01	020653-00	031713-00	030106-27	025593-18	021664-42	021142-00	004507-00	003090-03	008441-01	005541-10	001317-03	009270-04	002678-14	030106-29	022060-00	001900-14	031713-00	020757-00	025590-00	005273-00	030718-03	041314-00	022205-00	037513-02	023598-02	030764-05	023062-00	005365-00	004934-01	008363-00	022985-00	023053-00	021603-05	002484-43	002903-03	022985-00	036983-08	003090-04	005699-00	003023-05	003897-11	021891-03	022613-02	002149-00	005541-00	005498-00	001295-00	001827-01	022779-00	004999-00	001499-04	004977-00	023598-03	010727-09	004503-02	004586-15	010198-06	011940-02	001060-01	010394-09	022916-03	007329-05	020652-00	021584-00	010727-13	001982-00	021006-00	002477-00	020758-00	002669-00	002784-00	003100-08	020652-01	003897-01	002260-14	021891-03	001499-10	002678-08	009828-04	021584-10	008329-02	012873-01	023001-04	020746-00	001580-15	009748-07	002484-45	003938-08	030106-25	021464-00	002632-49	002484-43	009058-68	021891-02	001767-00	011220-00	022060-00	010098-00	002886-00	020705-00	021366-00	004882-00	021006-00	030032-01	042295-00	008090-03	010394-09	010198-19	005263-00	009621-05	002394-01	008694-06	041612-00	011166-04	010148-02	021108-02	022706-00	021553-00	004889-01	009748-01	022706-00	002484-45	008443-02	002393-01	020771-00	010674-00	003938-05	002254-00	021073-00	021603-00	022827-00	021078-00	004951-00	022180-00	010041-01	023001-05	003726-04	025998-05	023217-00	022506-02	002474-00	009975-00	020550-00	002474-00	026164-01	003938-09	021799-44	022202-00	001072-01	004646-00	003938-07	003897-13	031268-00	024824-00	002077-64	003259-08	021799-08	010529-00	023217-03	001329-01	002477-00	001881-03	021603-05	003632-08	003615-01	042797-00	001533-02	020522-00	004507-00	021584-00	021446-00	021891-00	005498-00	007327-05	021664-34	009088-04	003292-00	035103-00	030106-07	004642-07	001499-07	003077-01	006565-00	022699-00	003897-04	002441-01	010727-07	010491-01	009932-05	001060-01	003364-00	010860-00	003897-13	004646-06	021160-11	022392-03	003897-10	021875-00	003125-02	010394-10	025784-00	003090-02	009085-00	020705-00	004646-00	010674-00	007649-00	004952-00	010860-00	006725-00	023465-00	006679-00	022220-00	005273-00	004952-00	002249-01	025083-01	002474-00	025693-07	004508-25	003897-02	020681-00	042634-00	022209-00	008694-09	004765-09	009748-04	007085-11	004765-10	001534-09	005541-06	003509-03	022209-00	001047-10	007234-02	002607-02	003549-17	004646-05	009923-01	002385-05	031713-03	002393-01	003897-05	002441-01	004617-05	009270-03	010198-16	031203-01	030734-02	003634-13	008694-16	025943-02	040513-01	001441-02	001060-02	008694-17	003938-06	002666-05	002607-02	007329-03	008694-02	010142-05	001047-06	021891-00	005263-00	025349-00	020705-00	024824-00	038533-00	021191-00	005498-00	021805-00	010851-00	006477-00	030888-00	011732-00	021584-00	003947-00	023001-00	040963-00	025340-00	002784-00	020917-00	021891-00	033743-00	042087-00	001698-00	021218-01	003726-08	010198-07	042788-00	010832-00	001499-07	004951-00	022779-00	010040-02	022950-02	002678-09	010727-51	010727-17	004553-02	010963-00	003897-07	010727-24	021515-00	042544-00	005614-00	021006-08	025349-03	003897-11	006477-00	020552-00	002441-01	011940-01	003225-01	002484-45	003749-06	025943-04	021651-04	021584-17	022865-04	001746-04	010198-14	023522-00	020682-00	021553-00	022828-00	021553-00	021603-00	022285-00	020653-00	035453-00	021603-00	001473-00	001767-00	023001-00	042542-00	001464-00	042797-00	022472-00	022202-00	020681-00	023219-00	021006-00	025256-00	035453-00	042485-00	031713-00	021515-00	041672-00	041144-00	002784-00	021108-01	021603-06	010727-23	010394-07	021891-02	003634-08	025184-00	030874-02	023610-04	002607-02	031121-10	011940-03	004617-05	022795-00	004619-02	003674-06	010727-49	021474-00	021584-01	002014-09	010394-03	007669-00	020758-00	041426-01	006901-04	012393-03	004009-01	002607-21	009923-01	010724-01	030106-26	004503-01	002484-02	021004-00	004503-01	008441-03	003530-06	001060-10	030106-30	003897-09	010394-04	005384-00	001580-05	005492-00	030723-04	004642-06	006577-00	003703-07	003046-30	004508-25	001767-00	010394-10	001691-01	001205-29	002494-00	005022-00	021004-00	004951-00	001568-00	020746-00	001682-00	021553-00	022985-00	001855-00	001848-00	001767-00	021891-00	041314-00	010674-00	005335-00	023001-00	022795-00	025306-00	007912-00	021891-00	004951-00	023001-00	001698-00	010363-00	010963-00	023485-00	021553-00	021553-00	022985-00	021584-00	002833-00	013103-00	004507-00	021400-00	005022-00	031713-00	005273-00	021651-00	004009-01	001072-07	021437-00	040653-01	004617-04	003897-11	025450-00	021004-02	021651-02	001499-09	001533-01	023001-05	003379-06	001499-09	036983-01	031121-02	023058-13	003897-06	005263-02	001060-14	008694-07	009058-04	008694-03	022631-00	022631-08	001536-02	002657-04	026062-02	021686-00	003726-09	031563-01	022708-02	005335-00	030106-29	020653-00	001047-15	004673-05	004022-00	010674-00	005335-00	010456-00	003897-10	003842-56	004618-02	001288-00	001060-02	021004-02	025450-00	020681-02	008694-14	030290-00 {
		qui replace ID_FSC= sub if ID_FSC=="`f'"
	}
drop sub

save "$data_clean/NSC_enrollment_ma.dta", replace

*/
*** MATCH NSC ENROLLMENT SPELL DATES TO START AND END WEEKS ***

// load data
use "$data_clean/NSC_enrollment_ma.dta", clear


// merge with FSC-IPEDS crosswalk 
//a few cases done match -- either for profits or schools outside of US
/*
                  college |      Freq.     Percent        Cum.
----------------------------------------+-----------------------------------
                ITT TECHNICAL INSTITUTE |        250       34.20       34.20
                     QUEEN'S UNIVERSITY |        180       24.62       58.82
   AMERICAN UNIVERSITY OF THE CARIBBEAN |         90       12.31       71.14
             EVEREST COLLEGE - THORNTON |         61        8.34       79.48
                   MIDDLESEX UNIVERSITY |         52        7.11       86.59
                  KING'S COLLEGE LONDON |         31        4.24       90.83
                  UNIVERSITY OF WINDSOR |         13        1.78       92.61
      ALTIERUS CAREER COLLEGE- THORNTON |         11        1.50       94.12
ALTIERUS CAREER COLLEGE- WEST VALLEY .. |         10        1.37       95.49
     EVEREST COLLEGE - WEST VALLEY CITY |         10        1.37       96.85
                     PINCHOT UNIVERSITY |          7        0.96       97.81
             EVEREST COLLEGE- CUCAMONGA |          6        0.82       98.63
                HEALD COLLEGE - CONCORD |          5        0.68       99.32
       EVEREST INSTITUTE - GRAND RAPIDS |          2        0.27       99.59
  ALTIERUS CAREER COLLEGE- MERRILLVILLE |          1        0.14       99.73
        EVEREST INSTITUTE- MERRILLVILLE |          1        0.14       99.86
                    UNIVERSITY OF HAIFA |          1        0.14      100.00
----------------------------------------+-----------------------------------
                                  Total |        731      100.00
								  
ID_FSC	college	sasid
006679-00	QUEEN'S UNIVERSITY	180
022444-00	AMERICAN UNIVERSITY OF THE CARIBBEAN	90
004507-00	EVEREST COLLEGE - THORNTON	61
021805-00	MIDDLESEX UNIVERSITY	52
023286-00	ITT TECHNICAL INSTITUTE	52
023598-00	ITT TECHNICAL INSTITUTE	51
023219-00	ITT TECHNICAL INSTITUTE	34
021209-00	ITT TECHNICAL INSTITUTE	31
009085-00	KING'S COLLEGE LONDON	31
023610-00	ITT TECHNICAL INSTITUTE	28
022916-00	ITT TECHNICAL INSTITUTE	25
020652-00	ITT TECHNICAL INSTITUTE	17
006689-00	UNIVERSITY OF WINDSOR	13
023217-00	ITT TECHNICAL INSTITUTE	12
004507-00	ALTIERUS CAREER COLLEGE- THORNTON	11
022985-00	EVEREST COLLEGE - WEST VALLEY CITY	10
022985-00	ALTIERUS CAREER COLLEGE- WEST VALLEY CITY	10
041612-00	PINCHOT UNIVERSITY	7
022506-00	EVEREST COLLEGE- CUCAMONGA	6
021875-00	HEALD COLLEGE - CONCORD	5
021004-00	EVEREST INSTITUTE - GRAND RAPIDS	2
021004-00	EVEREST INSTITUTE- MERRILLVILLE	1
010456-00	UNIVERSITY OF HAIFA	1
021004-00	ALTIERUS CAREER COLLEGE- MERRILLVILLE	1

*/

merge m:1 ID_FSC using "$data_clean/FSC.dta", keep( master match  ) nogen  

// merge with IPEDS college characteristics
*SOME HAVE IPEDS IDs but not data -- only ones iwth issuse are beauty schools
merge m:1 ID_IPEDS using "$data_clean/IPEDS.dta",  /*assert(match using) */ keep(master match) 
*assert (!missing(ipeds_years) & !missing(ipeds_ownership) & !missing(ipeds_calendar))| ID_FSC=="022779-00"
keep if college!="" //only ones missing are beauty schools
drop ipeds_calendar _m

*clean up a few missings
replace ipeds_calendar=1 if ipeds_calendar == . // default to semester

*alterius/everest/itt all private 2 year for profits
foreach i in 004507-00 020652-00   021004-00  021209-00   021875-00  022916-00   023217-00   022985-00 023219-00 023286-00   005492-00 023598-00   022506-00 023610-00 {
replace ipeds_years = 2 if ID_FSC=="`i'"
replace ipeds_ownership =  0 if ID_FSC=="`i'"

foreach v of varlist ipeds_college_any ipeds_college_private ipeds_college_out{
    replace `v' = 1 if ID_FSC=="`i'"
}

foreach v of varlist  ipeds_college_public  ipeds_college_MA ipeds_college_MA_public ipeds_college_OOS_public  ipeds_college_umass_amh ipeds_college_umass_bos ipeds_college_umass_dart ipeds_college_umass_low{
    replace `v' = 0 if ID_FSC=="`i'"
}
}


*international mark as 4 year private 
foreach i in 006679-00 022444-00 021805-00 009085-00 006689-00 041612-00 010456-00  022460-00  022779-00 023217-00  {
replace ipeds_years = 4 if ID_FSC=="`i'"
replace ipeds_ownership =  1 if ID_FSC=="`i'"

foreach v of varlist ipeds_college_any ipeds_college_private ipeds_college_out{
    replace `v' = 1 if ID_FSC=="`i'"
}

foreach v of varlist  ipeds_college_public  ipeds_college_MA ipeds_college_MA_public ipeds_college_OOS_public  ipeds_college_umass_amh ipeds_college_umass_bos ipeds_college_umass_dart ipeds_college_umass_low{
    replace `v' = 0 if ID_FSC=="`i'"
}
}
replace ipeds_years = college_years if ipeds_years==.
replace ipeds_ownership = college_ownership if ipeds_ownership==.
replace ipeds_years = 4 if ipeds_years==. //grad programs
assert (!missing(ipeds_years) & !missing(ipeds_ownership) & !missing(ipeds_calendar))



// drop existing variables
gen college_4yr = college_years == 4 if !missing(college_years)
gen college_2yr = college_years == 2 if !missing(college_years)
drop college_ownership college_years

rename ipeds_* *
gen college_MA_private = college_private*college_MA
gen college_OOS_private = college_private*college_out
gen college_4yrpub	= (college_4yr & college_public)
gen college_4yrMApub= (college_4yr & college_MA_public)
gen college_4yrOOSpub= (college_4yr & college_OOS_public)
gen college_4yrpriv	= (college_4yr & college_private)
gen college_4yrMApriv	= (college_4yr & college_private & college_MA)
gen college_4yrOOSpriv	= (college_4yr & college_private & college_out)
gen college_2yrpub	= (college_2yr & college_public)
gen college_2yrMApub= (college_2yr & college_MA_public)
gen college_2yrpriv	= (college_2yr & college_private)
// convert dates to weeks
foreach time in start_date end_date {
	gen week_`time' = wofd(term_`time')
	format week_`time' %tw
}

keep sasid ID_FSC ID_IPEDS college week*   enrollment ///
college_state years calendar ownership cohort college_* NSCdatayear term_start_date term_end_date
*get rid of duplicates which are same enrollment spell but different NSC data files
duplicates drop sasid ID_FSC ID_IPEDS week_start week_end, force

save "${raw}/temp_nsc_enroll_ma.dta", replace

use  "${raw}/temp_nsc_enroll_ma.dta", replace


// expand to generate all weeks during term for each enrollment spell - now one obs for each week
sort sasid college week_start
gen spell_nsc = _n
gen n_weeks = (week_end - week_start) + 1
expand n_weeks
bysort spell_nsc: gen week = week_start + (_n - 1)
format week %tw

// flag NSC-provided terms
gen source_nsc = 1

	// if there are multiple records for a single week, default to the later recod
	// b/c we think it's a revision of the first and is therfore more accurate. 
	bysort sasid college week (term_start_date term_end_date) NSCdatayear: keep if (_n == _N)

// rename variables for merging with outcomes data
keep sasid ID_FSC ID_IPEDS college week source spell enrollment ///
college_state years calendar ownership cohort college_* NSCdatayear


save "${data_clean}/nsc_enroll_cln_ma.dta", replace

*****************************************************************************************************
*****************************************************************************************************

// generate enrollment windows

use "${data_clean}/nsc_enroll_cln_ma.dta", clear

	// find consecutive subspells in spells from NSC that didn't match school records 
	egen ID_tsset = group(sasid college spell_nsc), missing
	sort ID_tsset week
	gen subspell = 1
	replace subspell = subspell[_n-1]+1 if (ID_tsset == ID_tsset[_n-1]) & (week != week[_n-1] + 1)

	// update start and end weeks of sub spells
	bysort ID_tsset subspell: egen week_start = min(week)
	bysort ID_tsset subspell: egen week_end = max(week)
	gen weeks = (week_end - week_start) + 1
	format week_* %tw

// get one record for each spell
drop week ID_tsset *spell*
bysort _all: keep if (_n == 1) 
isid sasid college week_start week_end 

**************************
// loop over time windows
**************************

forvalues y = 1/$n_years_out {
foreach prefix in "in" "by" {

	// specify time
	local time `prefix'Y`y'             

	// get date that overlap period starts and ends
	qui gen overlap_start 	= max(week_start, ${week_start_`time'})
	qui gen overlap_end 	= min(week_end, ${week_end_`time'}) 
	
	// overlap period is min of the difference and zero
	qui gen overlap = max(overlap_end - overlap_start + 1, 0)
	
	// scale by term length
	qui gen spell_share_`time' = (overlap / weeks) 
	label var spell_share_`time' "share of enrollment spell in `time' window"
	drop overlap*
	
	// specify time
	local time `prefix'Y`y'             

	// get date that overlap period starts and ends
	qui gen overlap_start 	= max(week_start, ${week_start_`time'})
	qui gen overlap_end 	= min(week_end, ${week_end_`time'fall}) 
	
	// overlap period is min of the difference and zero
	qui gen overlap = max(overlap_end - overlap_start + 1, 0)
	
	// scale by term length
	qui gen spell_share_`time'fall = (overlap / weeks) 
	label var spell_share_`time'fall "share of enrollment spell in `time' fall window"
	drop overlap*
	
	// get date that overlap period starts and ends
	qui gen overlap_start 	= max(week_start, ${week_start_`time'spring})
	qui gen overlap_end 	= min(week_end, ${week_end_`time'spring}) 
	
	// overlap period is min of the difference and zero
	qui gen overlap = max(overlap_end - overlap_start + 1, 0)
	
	// scale by term length	
	qui gen spell_share_`time'spring = (overlap / weeks) 
	label var spell_share_`time'spring "share of enrollment spell in `time' spring window"
	drop overlap*

}
}

*****************************************************************************************************
*****************************************************************************************************

** GENERATE ENROLLMENT FLAT FILE ***

gen term_att = 1	
replace term_att= 0 if (enrollment <3)

// loop over time windows
forvalues y = 1/$n_years_out {
foreach prefix in "in" "by" {

	// build time string
	local timelist `prefix'Y`y' `prefix'Y`y'fall `prefix'Y`y'spring
	
	foreach time of local timelist{
		// loop over college categories
		foreach cat of global collegeCategories {	
		
			// binary attempted enrollment
			qui gen byte att_`cat'_`time' = term_att * college_`cat' * (spell_share_`time' > 0)

			// save college category label
			local l_`cat': var label college_`cat'
		}
	}
}
}

* Get first college attended to compare to LTO
gen startdate = dofw(week_start)
format startdate %td
tostring cohort, gen(cohort_str)
gen hsgraddate = "7/1/"+cohort_str
gen hsgraddatetd = date(hsgraddate,"MDY")
format hsgraddatetd %td
g keepflag=0
replace keepflag=1 if startdate>=hsgraddatetd
egen firstattdate = min(startdate) if keepflag==1, by(sasid cohort)
egen firstattdate_school = min(startdate) if keepflag==1, by(sasid cohort ID_FSC)
format firstattdate firstattdate_school %td
gen keepflag2=0
replace keepflag2=1 if firstattdate_school == startdate & firstattdate_school== firstattdate


*add opeid
g ID_FSC_firstinst= substr(ID_FSC,1,6) if keepflag2==1
g collegename_firstinst= college if keepflag2==1

keep *first* att* college_*  sasid cohort 
duplicates drop
foreach d of varlist att* college_* {
	bys sasid: egen max = max(`d')
	replace `d'=max
	drop max
	}

	drop *date*

foreach v of varlist ID_FSC *firstinst{
sort sasid  `v'
bys sasid : replace `v' = `v'[_n+1] if  `v'==""
duplicates drop
sort sasid  `v'
bys sasid : replace `v' = `v'[_n+1] if  `v'==""
duplicates drop
sort sasid  `v'
bys sasid : replace `v' = `v'[_n+1] if  `v'==""
duplicates drop
sort sasid  `v'
bys sasid : replace `v' = `v'[_n+1] if  `v'==""
duplicates drop
}

duplicates drop
*very few dups remain, random draw among those that do (typically same univ with slightly diff codes)
sample 1, count by(sasid)

ren collegename college_name_firstinst

*now we have a flat file

************************************************************************************	
************************************************************************************

*** FORCE WITHIN WINDOW BINARY ENROLLMENT OUTCOMES TO BE DISJOINT *** 

// loop over time windows
forvalues t = 1/$n_years_out {
	
	// build time string
	local timelist Y`t' Y`t'fall  Y`t'spring
	foreach time of local timelist{
	// for attempted variables
		foreach y in att { 
				
			// 4 and 2-year colleges: If both, code as 4 year so that 2yr is 2yr only.  
			replace `y'_2yr_in`time' = 0 if `y'_4yr_in`time'==1
			replace `y'_2yrMApub_in`time' = 0 if `y'_4yr_in`time'==1
			replace `y'_2yrpub_in`time' = 0 if `y'_4yr_in`time'==1
			replace `y'_2yrpriv_in`time' = 0 if `y'_4yr_in`time'==1
			
			// In-state and out-of-state: If both, code as in-state.
			replace `y'_out_in`time' = 0 if `y'_MA_in`time'==1
			
			// Public and private: If both, code as public.
			replace `y'_private_in`time' = 0 if `y'_public_in`time'==1
			replace `y'_4yrpriv_in`time' = 0 if `y'_4yrpub_in`time'==1
			replace `y'_4yrpriv_in`time' = 0 if `y'_4yrMApub_in`time'==1	
			
			// MA and out of MA public: If both, code as in-state
			replace `y'_OOS_public_in`time' = 0 if `y'_MA_public_in`time'==1
		}
	}
}

// disjoint outcomes
replace college_2yr = 0 if college_4yr==1
replace college_2yrpub = 0 if college_4yr==1
replace college_2yrMApub = 0 if college_4yr==1
replace college_2yrpriv = 0 if college_4yr==1
replace college_out = 0 if college_MA==1
replace college_private = 0 if college_public==1
replace college_2yrpriv = 0 if college_public==1
replace college_private = 0 if (college_4yrpub==1 | college_4yrMApub==1)
replace college_OOS_public = 0 if college_MA_public==1


// save flat file
gen source_nsc = 1
compress
save "$data_clean/flat_enrollment_NSC_ma.dta", replace


*DEGREES

use "$data_clean/NSC_degrees.dta", clear
merge m:1 sasid using "$raw/saves/proj_year12_ma.dta", keep(match ) nogen

local degrees 			AA BA CERT GRAD
local schooltypes		public private MA out MA_public OOS_public MA_private OOS_private

ren proj_year12 cohort

save "$data_clean/NSC_degrees_ma.dta", replace

use "$data_clean/NSC_degrees_ma.dta", clear
g sub = substr(ID_FSC,1,6) +"-00"
foreach f in  007329-07 ///
	001072-05 001536-01 001540-01 001564-01 002029-01 002029-03 002155-08 002155-10 002589-03 ///
	002629-02  003388-01  009828-12 010198-17 010394-11 010727-11 030106-18 002077-02 030425-01 ///
	001047-04 001047-18 001047-54 001305-03 001445-01 001499-01 001499-06 001571-13 001580-07 001580-16 002077-01 002155-02 002155-06 002155-09 ///
	002155-11 002155-12 002219-01 002219-02 002325-03  002484-44 002573-03 002589-01 002607-01 002626-00 002629-23  002666-01 002677-00 002678-01 ///
	002678-02 002678-03 002678-05 002678-10 002782-02 002783-00 002785-20 002803-01 002823-02 002869-01 002878-01 003184-01 003404-09 003404-10 003410-01 ///
	003549-01  003632-04 003634-10 003726-02 003938-02 004075-01 004075-03 004586-16 004619-01 004625-00 004729-05 004949-00 005306-00 006622-00 006799-00 006911-02 007329-07  008423-01 008694-01 008694-08 008694-11 008694-22 009157-05 009183-00 009267-01 009621-04 009621-06 009740-00 009747-01 009747-02 009828-02 010198-04 010198-05 010198-09 010198-13 010434-00 010727-05 010727-12 010727-18 010727-19 010727-20 011109-02 011123-01 011621-01 011941-01 012393-00 012393-01 012393-02 012393-09 012842-00 012873-02 020690-00 020739-00 020744-00 020748-00 020754-00 020774-00 020839-00 021000-00 021077-00 021108-00 021136-00 021136-13 021160-00 021163-00 021206-00 021466-00 021618-00 021664-00 021664-01 021664-04 021664-18 021664-38 021664-41 021707-00 021799-02 021799-47 022187-00 022187-09 022187-11 022195-00 022260-00 022375-00 022460-00 022606-00 022606-01 022606-02 022606-04 022613-03 023058-01 023058-05 023058-07 023058-08 023058-11 023413-00 024544-00 025039-00 025086-00 026167-06 026175-08 030106-20 030106-21 030219-02 030314-03 030704-02 030723-03 030734-01 030764-02 030875-05 030876-04 030908-00 031913-00 036914-00 038813-00 042443-00 042534-00  001090-02 001100-03 001329-01 001397-26 001397-28 001402-01 001417-06 001417-07 001417-09 001464-00 001473-00 001479-02 001499-02 001499-04  001499-08 001499-10 001499-16 001531-01 001534-08 001568-00 001580-01 001682-00 001698-00 001746-04 001746-17 001767-00 001827-01 001848-00 001855-00 001982-00 002077-03 002109-01 002126-02 002155-05 002254-00 002393-01 002394-01 002477-00 002520-01 002632-01 002651-01 002666-05 002669-00 002678-04 002678-06 002678-07 002678-16 002699-01 002784-00 002791-02 002823-01 002825-01 002833-00 002886-00 002894-01  002894-02 002978-03 002978-05 002978-06 003010-02 003010-04 003010-05 003077-02 003223-02 003256-01 003269-00 003313-01 003352-00 003509-03 003530-06 003549-15 003634-08 003691-10 003897-04 003897-06 003897-12 003938-05 003938-07 003947-00 004072-15 004503-02 004507-00 004586-17 004646-04 004729-04 004729-06 004765-11 004811-02 004882-00 004898-04 004951-00 004952-00 004999-00 005022-00 005263-00 005273-00 005335-00 005498-00 005526-00 005761-00 006477-00 006679-00 006725-00 006771-01 006911-01 007329-05 007649-00 007912-00 008090-03 008303-02 008329-05 008694-12 008694-14 008694-18 009058-00 009079-06 009085-00 009621-08 009621-11 009748-01 009748-02 009748-04 009828-04 009828-11 009932-05 010098-00 010198-02 010198-06 010198-07 010198-11 010198-12 010198-14 010198-18 010198-19 010363-00 010394-03 010394-09 010394-10 010395-01 010627-02 010674-00 010727-02 010851-01 010860-00 010963-00 010997-01 011109-01 011220-00 011649-01 011732-00 011858-02  011940-01 011940-04 013103-00 020522-00 020653-00 020681-00 020682-00 020705-00 020746-00 020757-00 020758-00 020917-00 021004-00 021006-00 021006-07 021006-11 021073-00 021078-00 021160-06 021160-11 021175-00 021191-00 021366-00 021400-00 021415-01 021415-02 021446-00 021515-00 021553-00 021553-06 021584-00 021584-10 021584-14 021603-00 021603-06 021651-01 021664-02 021664-07 021664-08 021664-22 021664-28 021664-42 021664-43 021664-44 021799-07 021799-39 021799-41 021799-43 021805-00 021830-00 021891-00 021891-01 021891-03 022060-00 022180-00 022187-07 022202-00 022220-00 022285-00 022472-00 022606-03 022779-00 022795-00 022827-00 022828-00 022985-00 023001-00 023001-03 023001-04 023058-06 023058-09 023058-12 023219-00 023462-00 023465-00 023471-00 023485-00 023522-00 024824-00 025256-00 025306-00 025340-00 025349-00 025590-00 025593-18 025693-00 025693-02 025693-03 025693-04 025693-07 025943-02 025998-05 026062-02 026167-03 026175-07 030106-04 030106-07 030106-11 030106-12 030106-14 030106-24 030106-25 030106-27 030106-29 030375-01 030723-04 030764-04 030764-05 030837-00 030837-02 030876-02 030876-03 030888-00 031121-01 031121-09 031563-01 031713-00 031713-03 031713-05 032103-09 033743-00 035453-00 037513-02 038533-00 040963-00 041144-00 041314-00 041426-01 041672-00 042087-00 042485-00 042542-00 042797-00 {
		qui replace ID_FSC= sub if ID_FSC=="`f'"
	}
	drop sub 
	

merge m:1 ID_FSC using "$data_clean/FSC.dta",    keep(match master)  nogen



/*
tab ID_FSC if _merge==1, sort

    Federal |
School Code |      Freq.     Percent        Cum.
------------+-----------------------------------
  004507-00 |          6       25.00       25.00
  006679-00 |          4       16.67       41.67
  009085-00 |          4       16.67       58.33
  021805-00 |          3       12.50       70.83
  022985-00 |          2        8.33       79.17
  023219-00 |          2        8.33       87.50
  023522-00 |          2        8.33       95.83
  021004-00 |          1        4.17      100.00
------------+-----------------------------------
      Total |         24      100.00


*/

// merge with IPEDS college characteristics
*SOME HAVE IPEDS IDs but not data -- only ones iwth issuse are beauty schools
merge m:1 ID_IPEDS using "$data_clean/IPEDS.dta",  /*assert(match using) */ keep(master match) 
*assert (!missing(ipeds_years) & !missing(ipeds_ownership) & !missing(ipeds_calendar))| ID_FSC=="022779-00"
keep if college!="" //only ones missing are beauty schools
drop ipeds_calendar _m

*clean up a few missings
replace ipeds_calendar=1 if ipeds_calendar == . // default to semester

*alterius/everest/itt all private 2 year for profits
foreach i in 004507-00 020652-00   021004-00  021209-00 020917-00 023522-00   021875-00  022916-00   023217-00   022985-00 023219-00 023286-00   005492-00 023598-00   022506-00 023610-00 {
replace ipeds_years = 2 if ID_FSC=="`i'"
replace ipeds_ownership =  0 if ID_FSC=="`i'"

foreach v of varlist ipeds_college_any ipeds_college_private ipeds_college_out{
    replace `v' = 1 if ID_FSC=="`i'"
}

foreach v of varlist  ipeds_college_public  ipeds_college_MA ipeds_college_MA_public ipeds_college_OOS_public  ipeds_college_umass_amh ipeds_college_umass_bos ipeds_college_umass_dart ipeds_college_umass_low{
    replace `v' = 0 if ID_FSC=="`i'"
}
}


*international mark as 4 year private 
foreach i in 006679-00 022444-00 021805-00 009085-00 006689-00 041612-00 010456-00  022460-00  022779-00 023217-00  {
replace ipeds_years = 4 if ID_FSC=="`i'"
replace ipeds_ownership =  1 if ID_FSC=="`i'"

foreach v of varlist ipeds_college_any ipeds_college_private ipeds_college_out{
    replace `v' = 1 if ID_FSC=="`i'"
}

foreach v of varlist  ipeds_college_public  ipeds_college_MA ipeds_college_MA_public ipeds_college_OOS_public  ipeds_college_umass_amh ipeds_college_umass_bos ipeds_college_umass_dart ipeds_college_umass_low{
    replace `v' = 0 if ID_FSC=="`i'"
}
}


// verify all characteristics are populated
replace ipeds_years = college_years if ipeds_years==.
replace ipeds_ownership = college_ownership if ipeds_ownership==.
replace ipeds_years = 4 if ipeds_years==. //grad programs
assert (!missing(ipeds_years) & !missing(ipeds_ownership) & !missing(ipeds_calendar))

// drop existing variables
gen college_4yr = college_years == 4 if !missing(college_years)
gen college_2yr = college_years == 2 if !missing(college_years)
drop college_ownership college_years

rename ipeds_* *
gen college_MA_private = college_private*college_MA
gen college_OOS_private = college_private*college_out
gen college_4yrpub	= (college_4yr & college_public)
gen college_4yrMApub= (college_4yr & college_MA_public)
gen college_4yrOOSpub= (college_4yr & college_OOS_public)
gen college_4yrpriv	= (college_4yr & college_private)
gen college_4yrMApriv	= (college_4yr & college_private & college_MA)
gen college_4yrOOSpriv	= (college_4yr & college_private & college_out)
gen college_2yrpub	= (college_2yr & college_public)
gen college_2yrMApub= (college_2yr & college_MA_public)
gen college_2yrpriv	= (college_2yr & college_private)

	* Merge on credential levels from NSC website
	 recast str60 degree_title, force
	 ren degree_title degree_title_name
	compress
		merge m:1 degree_title_name using "${data_clean}\nsc_credential_table.dta", keep(1 3) nogen


//deal with degree_title
gen degree=""
replace degree="BA" if CREDENTIAL_LEVEL_CODE=="BD"
replace degree="AA" if CREDENTIAL_LEVEL_CODE=="AD"
replace degree="GRAD" if CREDENTIAL_LEVEL_CODE=="DP"
replace degree="GRAD" if CREDENTIAL_LEVEL_CODE=="DR"
replace degree="GRAD" if CREDENTIAL_LEVEL_CODE=="MD"
replace degree="GRAD" if CREDENTIAL_LEVEL_CODE=="PC"
replace degree="CERT" if CREDENTIAL_LEVEL_CODE=="UC"

drop CREDENTIAL*

g degree_matched=0
replace degree_matched=1 if degree!=""

*tab degree_title if degree_matched==0, sort
*tab degree_title years if degree_matched==0

replace degree="BA" if regexm(degree_title, "BACHELOR")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "BACHELOR")==1
replace degree="BA" if regexm(degree_title, "BFA")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "BFA")==1
replace degree="BA" if regexm(degree_title, "BLS")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "BLS")==1
replace degree="BA" if regexm(degree_title, "ACHELOR")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "ACHELOR")==1
replace degree="AA" if regexm(degree_title, "ASSOCIATE")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "ASSOCIATE")==1
replace degree="AA" if regexm(degree_title, "A A")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "A A")==1
replace degree="AA" if regexm(degree_title, "A. S.")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "A. S.")==1
replace degree="AA" if regexm(degree_title, "A.S.")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "A.S.")==1
replace degree="AA" if regexm(degree_title, "A.PS.S")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "A.PS.S")==1
replace degree="AA" if regexm(degree_title, "AGS")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "AGS")==1
replace degree="BA" if regexm(degree_title, "B A")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "B A")==1
replace degree="BA" if regexm(degree_title, "B OF")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "B OF")==1
replace degree="BA" if regexm(degree_title, "B S")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "B S")==1
replace degree="BA" if regexm(degree_title, "B. S.")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "B. S.")==1
replace degree="AA" if regexm(degree_title, "SSOCIATE")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "SSOCIATE")==1
replace degree="AA" if regexm(degree_title, "ASSOC")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "ASSOC")==1 
replace degree="GRAD" if regexm(degree_title, "MASTER")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "MASTER")==1 
replace degree="CERT" if regexm(degree_title, "CERTIFICATE")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "CERTIFICATE")==1 
replace degree="CERT" if regexm(degree_title, "CERT")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "CERT")==1
replace degree="BA" if regexm(degree_title, "B.A.")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "B.A.")==1
replace degree="BA" if regexm(degree_title, "B.S.")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "B.S.")==1
replace degree="GRAD" if regexm(degree_title, "DOCTOR")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "DOCTOR")==1
replace degree="GRAD" if regexm(degree_title, "DOCT")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "DOCT")==1
replace degree="GRAD" if regexm(degree_title, "MA")==1 &degree_matched==0&regexm(degree_title, "DIPLOMA")==0
replace degree_matched=1 if regexm(degree_title, "MA")==1&regexm(degree_title, "DIPLOMA")==0
replace degree="BA" if regexm(degree_title, "BACH")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "BACH")==1
replace degree="BA" if regexm(degree_title, "BACCA")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "BACCA")==1
replace degree="BA" if regexm(degree_title, "UNDERGRAD")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "UNDERGRAD")==1
replace degree="GRAD" if regexm(degree_title, "M.A.")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "M.A.")==1 
replace degree="GRAD" if regexm(degree_title, "M.S.")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "M.S.")==1 
replace degree="GRAD" if regexm(degree_title, "MS")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "MS")==1 
replace degree="GRAD" if regexm(degree_title, "SM")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "SM")==1 
replace degree="GRAD" if regexm(degree_title, "AM")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "AM")==1 
replace degree="BA" if regexm(degree_title, "BS")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "BS")==1
replace degree="AA" if regexm(degree_title, "AA")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "AA")==1 
replace degree="AA" if regexm(degree_title, "AS")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "AS")==1 
replace degree="BA" if regexm(degree_title, "BA")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "BA")==1
replace degree="BA" if regexm(degree_title, "BBA")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "BBA")==1
replace degree="BA" if regexm(degree_title, "B B A")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "B B A")==1
replace degree="BA" if regexm(degree_title, "AB")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "AB")==1
replace degree="BA" if regexm(degree_title, "A.B.")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "A.B.")==1
replace degree="GRAD" if regexm(degree_title, "ED.M.")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "ED.M.")==1 
replace degree="GRAD" if regexm(degree_title, "EDM")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "EDM")==1 
replace degree="GRAD" if regexm(degree_title, "J.D.")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "J.D.")==1 
replace degree="GRAD" if regexm(degree_title, "JD")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "J.")==1 
replace degree="BA" if regexm(degree_title, "B.")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "B.")==1
replace degree="BA" if regexm(degree_title, "EDUCATION")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "EDUCATION")==1
replace degree="BA" if regexm(degree_title, "5-8")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "5-8")==1
replace degree="BA" if regexm(degree_title, "8-12")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "8-12")==1
replace degree="BA" if regexm(degree_title, "PK-12")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "PK-12")==1
replace degree="BA" if regexm(degree_title, "1-6")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "1-6")==1
replace degree="AA" if regexm(degree_title, "A.")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "A.")==1
replace degree="GRAD" if regexm(degree_title, "M.")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "M.")==1
replace degree="GRAD" if regexm(degree_title, "DR")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "DR")==1 
replace degree="GRAD" if regexm(degree_title, "SCHOOL PSYCHOLOGIST")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "SCHOOL PSYCHOLOGIST")==1 
replace degree="GRAD" if (degree_title=="O.D."|degree_title=="DPT"|degree_title=="GPD"|degree_title=="D") &degree_matched==0
replace degree_matched=1 if (degree_title=="O.D."|degree_title=="DPT"|degree_title=="GPD"|degree_title=="D") &degree_matched==0
replace degree="CERT" if regexm(degree_title, "CER")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "CER")==1 
replace degree="CERT" if regexm(degree_title, "CT")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "CT")==1 
replace degree="CERT" if degree_title=="C"&degree_matched==0
replace degree_matched=1 if degree_title=="C"&degree_matched==0
replace degree="CERT" if degree_title=="GED"&degree_matched==0
replace degree_matched=1 if degree_title=="GED"&degree_matched==0
replace degree="BA" if regexm(degree_title, "SB")==1 &degree_matched==0
replace degree_matched=1 if regexm(degree_title, "SB")==1
replace degree="AA" if degree_title=="A"&degree_matched==0
replace degree_matched=1 if degree_title=="A"&degree_matched==0
replace degree="BA" if degree_title=="B"&degree_matched==0
replace degree_matched=1 if degree_title=="B"&degree_matched==0
replace degree="CERT" if  (degree_title=="CP"|degree_title=="CRT"|degree_title=="CT"|degree_title=="COOKING") &degree_matched==0
replace degree_matched=1 if  (degree_title=="CP"|degree_title=="CRT"|degree_title=="CT"|degree_title=="COOKING") &degree_matched==0

drop if  regexm(degree_title, "NO DEGREE")==1|regexm(degree_title, "NON DEGREE")==1

tab degree_title if degree_matched==0, sort
tab degree_title years if degree_matched==0
 

// assign BA for 4-yr and AA for 2yr for those with no or with confusing degree title information
* this is only for the few that don't have degree info assigned above
* less than 1% of the sample
replace degree = "BA" if (years == 4) & degree==""
replace degree = "AA" if (years == 2) & degree==""
replace degree = "CERT" if (years == 1) & degree==""
assert !mi(degree)

// generate weekly dataset
gen degree_week = wofd(graduated_date) 
format *week %tw

********************************************************************************
********************************************************************************
********************************************************************************
********************************************************************************

*** GENERATE COMPLETIONS OUTCOMES ****

local degrees 			AA BA CERT GRAD
local schooltypes		public private MA out MA_public OOS_public MA_private OOS_private

// loop over time windows and degree types
forvalues y = 1/$n_years_out {
	
	local time Y`y'

	// binary completions outcome by degree type
	foreach str of local degrees {
		gen cmp_`str'_by`time' = (degree == "`str'") * (degree_week < ${week_end_by`time'})
	} 
	// binary completion (any)
	gen cmp_any_by`time' = (cmp_BA_by`time' | cmp_AA_by`time' )
	
} // end of time loop

// degree by institution characteristics
forvalues y = 1/$n_years_out {
	local time Y`y'
	foreach schooltype of local schooltypes{
		gen cmp_BA_`schooltype'_by`time' = cmp_BA_by`time'*college_`schooltype'
		gen cmp_AA_`schooltype'_by`time' = cmp_AA_by`time'*college_`schooltype'
	}
}

*SRC 9/6/2019
*duplicates due to NSC file
drop grad_year NSCdatayear searchbegin college_*  collegeseq youruniq  //college name sometimes inconsistent
duplicates drop
*remaining dups seem to be two dates within same institution, or two institutions (multiple degrees)
*within institution, keep earliest degree


*by degree type
bys sasid ID_FSC degree: egen min=min(graduated_date)
keep if graduated_date==min
drop  min
drop *CERT* *GRAD* 
drop if degree=="CERT"|degree=="GRAD"
duplicates drop 

*now only multi institution dups remain
*within degree type, take earliest degree
bys sasid degree:  egen min=min(graduated_date)
keep if graduated_date==min

*some are on same date -- randomly break ties
sample 1, count by(sasid graduated_date degree)
*now all that remains in BA and AA institutions

foreach d in AA BA{
	g ID_FSC_`d'=ID_FSC if degree=="`d'"
	g college_name_`d'=college if degree=="`d'"
	g graduated_date_`d'=graduated_date if degree=="`d'"
}

drop ID_FSC ID_IP ID_OPE college degree_title calendar years ownership degree degree_matched degree_week  graduated_date min
duplicates drop 

foreach v of varlist ID*A college*A{
sort sasid `v'
bys sasid : replace `v' = `v'[_n+1] if _n>=1 & `v'[_n+1]!=""
}
foreach v of varlist graduated_date*{
bys sasid : egen max =max(`v')
replace `v' = max
drop max
}
*these are people with both AA and BA
duplicates drop


// collapse to flat file 
collapse (max) cmp* , by(sasid cohort *BA *AA)

gen source_nsc = 1

*YOU CAN HAVE BOTH A BA AND AN AA
// save data
compress
save "$data_clean/flat_degrees_NSC_ma.dta", replace

}


